version 16.1
clear all
cd "MYPATH\draft_tab_fig"
adopath + ../ado/

cap log close
pause on

log using descript_tables.log, replace

preliminaries
foreach PATH in RESULTS TEMP {
	cap mkdir "${`PATH'}\draft_tab_fig"
    if "`PATH'" != "TEMP" cap mkdir "${`PATH'}\draft_tab_fig\figures"
}

program main
	sample_selection
	sum_stats
	nt_policy_var
	nipt_policy_var
	cap log close
end

program sample_selection 
	* Sample selection table
	
	// All NT screenings for pregs with expected due date from 2011 to nov302019
    use "${DATA}\KUB_data\cleaned_integrated_kub.dta", clear
	
		* Assign pregnancy exp due date (bc using KUB data before we calculated exp due date)
		qui {
			gen date_preg = .
			format date_preg %td
			label var date_preg "Pregnancy Date (assigned)"
			
			* (280 - Gestational age at KUB testing) + KUB testing date (from KUB) 
			replace date_preg = (280 - ga_vid_dagar) + undersokningsdatum
			
			* Date of last period + 280 (from KUB)
			replace date_preg = sm_datum + 280 if mi(date_preg) & !mi(sm_datum)
			replace date_preg = . if abs(sm_datum - undersokningsdatum) >= 365 & mi(date_preg)
			
			* If still missing, assign based on KUB test date. Assume test occurs at end of 
			* 12th week. So birth month is 28 weeks (196 days) after test date.
			replace date_preg = undersokningsdatum + 196 if mi(date_preg)
			
			gen month = month(date_preg)
			gen year = year(date_preg)
		}
		
	gen m_year = ym(year, month)
	format m_year %tm
	drop if year(undersokningsdatum) < 2011 | year(undersokningsdatum) > 2019
	qui unique lopnr undersokningsdatum foster_risk 
	local n_screen = r(unique)
	qui unique lopnr undersokningsdatum 
	local n_preg = r(unique)
	qui unique lopnr 
	local n_mom = r(unique)
	matrix samp_select = nullmat(samp_select) \ `n_screen', `n_preg', `n_mom'
	
	// Singleton pregnancies (and NT coverage introduced in region)
	use "$DATA\analysis_sample", clear
	drop if wave == 1
	qui unique pregnancy 
	local n_screen = r(unique)
	qui unique pregnancy 
	local n_preg = r(unique)
	qui unique lopnr 
	local n_mom = r(unique)
	matrix samp_select = nullmat(samp_select) \ `n_screen', `n_preg', `n_mom'
	
	// Risk greater than 1/1000
	drop if fetus_risk > 1000
	qui unique pregnancy 
	local n_screen = r(unique)
	qui unique pregnancy 
	local n_preg = r(unique)
	qui unique lopnr 
	local n_mom = r(unique)
	matrix samp_select = nullmat(samp_select) \ `n_screen', `n_preg', `n_mom'
	
	// Intermediate risk coverage regions 
	qui unique pregnancy if nipt_51_200 == 1
	local n_screen = r(unique)
	qui unique pregnancy if nipt_51_200 == 1
	local n_preg = r(unique)
	qui unique lopnr if nipt_51_200 == 1
	local n_mom = r(unique)
	matrix samp_select = nullmat(samp_select) \ `n_screen', `n_preg', `n_mom'
	
	// High risk coverage regions 
	qui unique pregnancy if nipt_hr_cov == 1
	local n_screen = r(unique)
	qui unique pregnancy if nipt_hr_cov == 1
	local n_preg = r(unique)
	qui unique lopnr if nipt_hr_cov == 1
	local n_mom = r(unique)
	matrix samp_select = nullmat(samp_select) \ `n_screen', `n_preg', `n_mom'

	frmttable using "${RESULTS}\draft_tab_fig\samp_select.tex", statmat(samp_select) ///
	  rtitles("All NT screenings" \ "Singleton pregnancies" \ ///
	    "Pregnancy risk $\geq$ 1:1000" \ "NIPT coverage in intermediate-risk range" \ ///
		"NIPT coverage in high-risk range")  ///
	  ctitles("" "Screenings" "Pregnancies" "Women") ///
	  tex sdec(0) replace
	
end

program sum_stats
	use "$DATA\analysis_sample", clear
	merge 1:1 pregnancy using $DATA\all_screens_universalnt_w_xs, assert(3) keep(3) ///
	  keepusing(married mom_foreign dv_prev_kids prev_mis_still prev_birth_issue prev_concern prev_any_q_icd)
	local dollar_per_sek = 567.5 / 5000
	label var did_nipt "Share got NIPT"
	label var did_invasive "Share got invasive" 
	qui {
		gen count_preg = 1
		qui gen age1 = 1 * (age < 25)
		qui gen age2 = 1 * (age >= 25 & age < 35)
		qui gen age3 = 1 * (age >= 35 & !mi(age))
		gen no_college = 1 * (educ == 1)
		gen some_college = 1 * (educ == 2)
		gen college = 1 * (educ == 3) 
		gen mi_educ = 1 * mi(educ)
		replace mom_foreign = 1 * (mom_foreign == 2)
		gen risk = "vlow" if fetus_risk > 1000
		replace risk = "low" if fetus_risk <= 1000
		replace risk = "med" if fetus_risk <= 200
		replace risk = "high" if fetus_risk <= 50
		replace hh_inc_smooth = hh_inc_smooth * `dollar_per_sek' 
		forval i = 1/4 {
			qui gen inc_quartile`i' = 1 * (inc_quartile == `i')
		}
		qui gen inc_quartile_mis = 1 * (mi(inc_quartile))
	}
	local mom_vars married mom_foreign age age1 age2 age3 ///
	  hh_inc_smooth inc_quartile1 inc_quartile2 inc_quartile3 inc_quartile4 inc_quartile_mis  ///
	  no_college some_college college mi_educ ///
	  dv_prev_kids prev_birth_issue prev_concern prev_any_q_icd
	local tab_vars subt did_nipt did_invasive live_birth healthy chroma no_live ///
	  termination_clean stillbirth
	    
	tempfile sum_stat_samp 
	save `sum_stat_samp', replace
	
	di "Share missing education"
	qui count 
	local tot_preg = r(N)
	qui count if mi(educ)
	local mi_educ_preg = r(N)
	di `mi_educ_preg' / `tot_preg' * 100
	foreach samp in "analysis" {
		foreach gr in  "all" "thou" "51_200"  {
			foreach var in `mom_vars' `tab_vars' {
				use `sum_stat_samp', clear
				if "`gr'" == "thou" drop if fetus_risk > 1000
				if "`gr'" == "51_200" keep if nipt_51_200 == 1 & fetus_risk <= 1000
				if inlist("`var'", "live_birth", "healthy", "chroma", "no_live", ///
				  "termination_clean", "stillbirth") {
				  	di "`gr'"
				  	di "`var'"
					count if `var' == 1
				  	keep if m_year < ym(2019,12)
					count
					count if `var' == 1
				  }
				qui sum `var'
				matrix temp_mat = nullmat(temp_mat) \ r(mean) 
			}
			use `sum_stat_samp', clear
			if "`gr'" == "thou" drop if fetus_risk > 1000
			if "`gr'" == "51_200" keep if nipt_51_200 == 1 & fetus_risk <= 1000
			qui count
			matrix temp_mat = r(N) \ nullmat(temp_mat)
			matrix sum_stats = nullmat(sum_stats), temp_mat 
			matrix drop temp_mat
		}
		
		frmttable using "${RESULTS}\draft_tab_fig\sum_stats_`samp'.tex", statmat(sum_stats) ///
		  rtitles("Number of pregnancies" \ "Married" \ "Foreign-born" \ ///
		    "Maternal age" \ "< 25" \ "25-34" \ "35+" \ ///
			"Household income" \ "Income: q1" \ "Income: q2" \ "Income: q3" \ ///
			"Income: q4" \ "Income: missing" \ ///
			"No college" \ "Some college" \ "College graduate" \ "Missing" \ ///
			"Any previous kids" \ ///
			"Previous birth issue" \ "Previous mis/still/death within 28/pre-term live" \ ///
			"Previous congen deform or CA " \ ///
			"Any post-NT testing" \ "cfDNA testing" \ "Invasive testing" \ ///
			"Live birth" \ "Live birth, no CA" \ "Live birth, CA" \ ///
			"No live birth" \ "Ended before 22 weeks" \ "Stillbirth")   ///
		  ctitles("", "All singleton pregnancies", "$ p \geq 1/1000$", "[1/51, 1/200] NIPT coverage") ///
		  tex sdec(0 \ 2 \ 3\ 3 \ 3 \ 2 \ 3 \ 3 \ 3 \3 \ 3 \ 3 \ 3 \ 3 \ 3 \ 3 ///
		    \ 3 \ 3\ 3 \ 3\ 3\ 3 \ 3 \ 3 \ 3 \ 3 \ 3 \ 3) replace
		
		putexcel set "${RESULTS}\draft_tab_fig\sum_stats_`samp'", replace
		putexcel A1 = matrix(sum_stats), nformat(number_d2)

		matrix drop sum_stats
	}
	
end

program nt_policy_var
	di "Share pregs in MBR in region-months of each type of KUB coverage"
	use lopnr childid bfoddat birth_flag nonsingleton num_birth_records grdbs grvfv grdfv ///
	  bpsmdat bpuldat bdiag* ///
	  using "MYPATH\MYPATH.dta", clear
    format lopnr  %15.0f
	di "Extract birth date"
	gen bfoddat_month = mod(bfoddat, 100)
	gen bfoddat_yr = floor(bfoddat/100)
	gen year = bfoddat_yr
	merge m:1 lopnr year using "MYPATH\MYPATH.dta", assert(1 2 3) keep(1 3) ///
		 generate(mother_in_tax_data) keepusing(age lan)
	* Calc week 10 date
	*gen bfo_15 = mdy(bfoddat_month, 15, bfoddat_yr) if mi(bpuldat)
	gen bp_yr = floor(bpuldat/10000)
	gen bp_m = floor(bpuldat/100)
	replace bp_m = mod(bp_m, 100)
	gen bp_d = mod(bpuldat, 100)
	gen bpuldat_num = mdy(bp_m, bp_d, bp_yr)
	format bpuldat_num %td
	format bpuldat %12.0f
	list bpuldat bpuldat_num bp_d bp_m bp_yr in 1/50
	rename bpuldat bpuldat_orig
	rename bpuldat_num bpuldat
	gen bfo_15 = mdy(bfoddat_month, 15, bfoddat_yr) if mi(bpuldat) | bp_yr < 2000 | bp_yr > 2020
	gen week10_date = bpuldat - 210
	replace week10_date = bfo_15 - 210 if mi(bpuldat) | bp_yr < 2000 | bp_yr > 2020
	format bfo_15 bpuldat week10_date %td
	keep if year >= 2011
	tab bp_yr
	count if mi(week10_date)
	di "There are `r(N)' observations with missing week10_date."
	list bpuldat_orig bpuldat bfo_15 if mi(week10_date)
	
	* merge waves
	gen m_year = ym(bfoddat_yr, bfoddat_month)
	di "# missing lan, then drop them"
	count if mi(lan)
	drop if mi(lan)
	merge m:1 lan using $DATA\waves.dta, assert(2 3) keep(3) gen(wave_merge)
	assert wave_merge == 3
	qui drop wave_merge 
	qui gen wave = 1 if week10_date < dofm(wave2_intro)
	qui replace wave = 2 if week10_date >= dofm(wave2_intro)
	qui replace wave = 3 if week10_date >= dofm(wave3_intro)
	qui define_kub_type

	foreach group in all tw19 {
		preserve
		if "`group'" == "tw19" keep if year == 2019
		qui count 
		local tot_preg = r(N)
		
		di "share universal"
		qui count if kub_type == 2
		local share_univ = r(N) / `tot_preg' * 100
		
		di "share 35+"
		qui count if kub_type == 1
		local share35 = r(N) / `tot_preg' * 100
		
		di "share 38+"
		qui count if kub_type == 3
		local share38 = r(N) / `tot_preg' * 100
		
		di "No NT coverage"
		qui count if kub_type == 0
		local share_nocov = r(N) / `tot_preg' * 100
		
		matrix `group'_ntcov_shares = `share_univ' \ `share35' \ `share38' \ `share_nocov'
		restore
	}
	lan_stats
	matrix ntcov_shares = all_ntcov_shares, tw19_ntcov_shares
	frmttable using "${RESULTS}\draft_tab_fig\ntcov_shares.tex", statmat(ntcov_shares) ///
		  rtitles("Universal NT" \ "35+ NT" \ "38+ NT" \ "No NT cov" )   ///
		  ctitles("", "2011-2019", "2019") ///
		  tex sdec(2) replace
end

program define_kub_type
	gen kub_type = 0 if wave == 1 
	replace kub_type = 1 if inlist(lan, 3, 4, 9, 12, 14, 20, 21, 22, 23, 24) & wave > 1
	replace kub_type = 2 if inlist(lan, 1, 5, 6, 7, 8, 13, 17, 18, 19) & wave > 1
	* Some Counties modified KUB Offers
	replace kub_type = 1 if lan == 19 & week10_date >= dofm(ym(2018, 1))
	replace kub_type = 2 if lan == 3 & week10_date >= dofm(ym(2016, 1))
	replace kub_type = 2 if lan == 20 & week10_date >= dofm(ym(2017, 11))
	replace kub_type = 2 if lan == 22 & week10_date >= dofm(ym(2015, 1))
	replace kub_type = 2 if lan == 23 & week10_date >= dofm(ym(2016, 4))
	replace kub_type = 1 if lan == 17 & week10_date >= dofm(ym(2014,6)) & week10_date <= dofm(ym(2014, 9))
	replace kub_type = 1 if lan == 17 & week10_date >= dofm(ym(2015,4)) & week10_date <= dofm(ym(2015, 9))
	replace kub_type = 3 if lan == 10 & wave == 2
	replace kub_type = 0 if lan == 10 & wave == 3
	* replace kub_type = 0 if lan == 10 & week10_date >= dofm(ym(2016, 7))
	replace kub_type = 0 if lan == 25
	*** new additions *** (7/27/2022)
	replace kub_type = 2 if lan == 12 & week10_date >= dofm(ym(2018, 3))
	replace kub_type = 1 if lan == 20 & week10_date >= dofw(yw(2018, 27)) & week10_date <= dofw(yw(2018, 32))
	replace kub_type = 1 if lan == 20 & week10_date >= dofw(yw(2019, 27)) & week10_date <= dofw(yw(2019, 30))
	label define kub_t 0 "No NT coverage" 1 "Age 35 KUB threshold" ///
	  2 "Universal KUB coverage" 3 "Age 38 KUB threshold"
	label values kub_type kub_t
end

program lan_stats
	di "Look at tabulations of wave/kub_type by year for each lan separately"
	count
	di "Total obs: `r(N)'"
	format week10_date %td
	count if !mi(week10_date)
	di "There are `r(N)' observations with non missing week10_date"
	format bpuldat %td
	list bpuldat week10_date bfo_15 in 1/50
	codebook week10_date
	gen year_of_week10 = year(week10_date)
	codebook year_of_week10
	tab year_of_week10, mi
	tab year, mi
	tab year year_of_week10, mi
	tab kub_type, mi
	qui sum lan, d
	forvalues i = `r(min)'/`r(max)' {
		count if lan == `i' 
		if `r(N)' == 0 {
			continue
		}
		di "lan: `i'"
		di "Total obs in lan `i' = `r(N)'"
		tab year_of_week10 kub_type if lan == `i'
		tab wave kub_type if lan == `i'
		if `i' == 7 {
			di "Kronoberg has split in 3/2012"
			gen latter = 0
			replace latter = 1 if week10_date >= dofm(ym(2012, 3))
			tab latter kub_type if lan == `i'
			tab latter wave if lan == `i'
			drop latter
		}
		if `i' == 20 {
			di "Dalarna has split in 11/2017" 
			gen latter = 0 
			replace latter = 1 if week10_date >= dofm(ym(2017, 11))
			tab latter kub_type if lan == `i'
			tab latter wave if lan == `i'
			drop latter
		}
		if `i' == 13 {
			di "Halland has split in 5/2017" 
			gen latter = 0
			replace latter = 1 if week10_date >= dofm(ym(2017, 5))
			tab latter kub_type if lan == `i'
			tab latter wave if lan == `i'
			drop latter
		}
		if `i' == 23 {
			di "Jamtland has split in 4/2016"
			gen latter = 0 
			replace latter = 1 if week10_date >= dofm(ym(2016, 4))
			tab latter kub_type if lan == `i'
			tab latter wave if lan == `i'
			drop latter
		}
		if `i' == 22 {
			di "Vasternorrland has split in 3/2012"
			gen latter = 0
			replace latter = 1 if week10_date >= dofm(ym(2012, 3))
			tab latter kub_type if lan == `i'
			tab latter wave if lan == `i'
			drop latter
		}
		if `i' == 10 {
			di "Blekinge has split in 7/2016"
			gen latter = 0 
			replace latter = 1 if week10_date >= dofm(ym(2016, 7))
			tab latter kub_type if lan == `i'
			tab latter wave if lan == `i'
			drop latter
		}
	}
end

program nipt_policy_var
   di "Share of analysis sample"
   use $DATA\analysis_sample, clear
   keep if wave != 1
   keep if fetus_risk <= 1000

	qui gen policy_regime = 1 if inlist(lan, 1, 4, 9, 13, 14, 19, 20, 23) // 51_200
	qui replace policy_regime = 2 if inlist(lan, 3, 17, 21, 22) // 2_200
	qui replace policy_regime = 3 if inlist(lan,7,12) // 51_1000
	qui replace policy_regime = 4 if inlist(lan,18) // orebro
	qui replace policy_regime = 5 if inlist(lan,5,6,8) // 51_300
	qui replace policy_regime = 6 if inlist(lan,10) // blekinge
	qui replace policy_regime = 7 if mi(policy_regime) // Didn't introduce NIPT
	qui count 
	local tot_preg = r(N)
	
	forval i = 1/7 {
		qui count if policy_regime == `i'
		local share = r(N) / `tot_preg'
		if `share' != 0 {
			matrix share_regime = nullmat(share_regime) \ `share' * 100

		}
	}
	frmttable using "${RESULTS}\draft_tab_fig\share_niptregime.tex", statmat(share_regime) ///
		  rtitles("51_200" \ "2_200" \ "51_1000" \ "2_1000" )   ///
		  ctitles("", "Share by cfDNA regime") ///
		  tex sdec(2) replace
end

* EXECUTE 
main
